** 	Data construction for the mine panel and the lease data set
**	Created: January 2021
**  Notes: Merging mine measures with information from the lease directory, variable creation
**  Creates: FINAL_Replication_Mines, FINAL_Replication_Leases
*************************************************************************************

set more off
global mines = "F:/Dropbox (ESOC - Princeton)/Public services and conflict in India/Mining"
cd "$mines/Restat_Repository"

u "INPUT_Mines", clear


*****************************************
* 1. Create FINAL_Replication_Mines.dta
*****************************************


* CREATE TREATMENT VARIABLES
*---------------------------

xtset panelid year
gen period=year
gen bauxite_dum=mineral=="Bauxite"
gen iron_dum=mineral=="Iron Ore"
gen chromite_dum=mineral=="Chromite"
gen manganese_dum=mineral=="Manganese Ore"
gen iron=mineral=="Iron Ore" 
gen post=year>=2009
gen post1=inrange(year,2009,2011)
gen post2=year>=2012
gen treat_post=iron*inrange(year,2009,2013)
gen treat_post1=treat_post*inrange(year,2009,2011)
gen treat_post2=treat_post*inrange(year,2012,2013)
egen state_id=group(st_code)


* COMPLETE PANEL
*---------------

xtset panelid period

tsfill, full
by panelid: carryforward _all, replace

keep if period>=2007 & period <=2013
drop if period==2007 & year==.

xtset panelid period

gen freq=1


* AGGREGATE LEASE INFORMATION
*-------------------------------

* Illegal mining measures based on the aggregates in the village of the target mine
preserve
u "INPUT_Village_Leases.dta", clear
* Entries of the same area are assumed to be renewals of leases, so keep the most recent expiration date.
collapse (max) fullexpdate yearexp (max) sample, by(mineral vill_name area)
* When there are 2 measurements per village name (2 cases), assign the additional mines to the mine with the largest legal area.
gen vill_name_aggregate=vill_name
bys vill_name_aggregate mineral: egen area_max=max(area/(sample==1))
replace vill_name_aggregate=vill_name_aggregate+" - smallest mine" if round(area_max,.01)!=round(area,.01) & sample==1
* Aggregate areas and expiration dates:
bys  mineral vill_name_aggregate: egen area_vill=sum(area) 
bys  mineral vill_name_aggregate: egen exp_vill=max(yearexp)
bys  mineral vill_name_aggregate: egen fullexp_vill=max(fullexpdate)
bys  mineral vill_name_aggregate: egen sample_vill=max(sample)
tempfile vill_measures
save `vill_measures'
restore

merge m:1 mineral area vill_name using `vill_measures', keepusing(fullexpdate area_vill exp_vill fullexp_vill) gen(mergeY)
keep if mergeY==3


* CREATE ILLEGAL MINING PROXIES AND OUTCOMES
*--------------------------------------------

gen expired_vill=1 if fulldate>fullexp_vill & year!=. & fullexp_vill!=.
replace expired_vill=0 if fulldate<=fullexp_vill & year!=. & fullexp_vill!=.

* Illegal mining measures based on the target mine of the remote sensing firm
* These variables are created for completeness, but not included in the final replication file.
gen legal_area=area*10000
gen expired=1 if year>yearexp & year!=. & yearexp!=.
replace expired=0 if year<=yearexp & year!=. & yearexp!=.
label variable yearexp "Year of Expiry"
gen excess_dum=(area__sq_m-legal_area)>0 
gen excess_dum50=(area__sq_m-legal_area)/legal_area>0.5
bys panelid: egen expired_base=max(expired*inrange(year,2005,2008))

* Illegal mining measures based on the village aggregation:
qui gen legal_area_vill=area_vill*10000
qui gen excess_mining_vill=area__sq_m-legal_area_vill
qui gen excess_dum_vill=1 if excess_mining_vill>0 & excess_mining_vill!=.    
qui replace excess_dum_vill=0 if excess_mining_vill<=0 & excess_mining_vill!=.     
gen perc_illegal_vill=(excess_mining_vill/legal_area_vill)
qui gen excess_dum50_vill=1 if perc_illegal_vill>0.5 & perc_illegal_vill!=.  
qui replace excess_dum50_vill=0 if perc_illegal_vill<=0.5 & perc_illegal_vill!=. 
drop perc_illegal_vill excess_mining_vill
bys panelid: egen expired_base_vill=max(expired_vill*inrange(year,2005,2008))

* Create additional treatments:
bys panelid: egen counter=total(freq*(period>=2007))
gen imputed=(year!=period) & ( (inrange(year,2005,2006) & period==2007)==0)
gen treat_post1_exp=treat_post*inrange(year,2009,2011)*expired_base
gen treat_post2_exp=treat_post*inrange(year,2012,2013)*expired_base
gen treat_post1_exp_vill=treat_post*inrange(year,2009,2011)*expired_base_vill
gen treat_post2_exp_vill=treat_post*inrange(year,2012,2013)*expired_base_vill

* FULLY BALANCED PANEL
*----------------------

*Create outcomes for a fully balanced panel by assigning 2008 observations to 2007
preserve
keep if counter==6
keep if period==2008
replace period=2007
gen f_excess_dum_vill=excess_dum_vill
gen f_excess_dum50_vill=excess_dum50_vill
gen f_truck_active=truck_active
keep  period panelid counter f_excess_dum_vill f_excess_dum50_vill f_truck_active year expired_base_vill state_id treat_post1* treat_post2* 
tempfile add_obs
save `add_obs', replace
restore
merge m:1 panelid period using `add_obs'
gen excess_dum_vill_full=excess_dum_vill
gen truck_active_full=truck_active 
gen excess_dum50_vill_full=excess_dum50_vill
replace excess_dum_vill_full=f_excess_dum_vill if excess_dum_vill_full==.
replace truck_active_full=f_truck_active if truck_active_full==.
replace excess_dum50_vill_full=f_excess_dum50_vill if excess_dum50_vill_full==.

keep year period post excess_dum_vill excess_dum50_vill truck_active bauxite_dum iron_dum  chromite_dum manganese_dum expired_base_vill counter ///
treat_post treat_post1 treat_post2 treat_post1_exp_vill treat_post2_exp_vill freq  state_id ///
panelid  treat_post  imputed  excess_dum_vill_full  excess_dum50_vill_full   truck_active_full  

* LABELS
*--------

label var  year  "Year of measurement"
label var  period  "Panel Year"
label var  post  "Post 2009"
label var  excess_dum_vill  "Excess mining area (0-1)"
label var  excess_dum50_vill "Excess mining area, at least 50\% (0-1)"
label var  truck_active "Mine with active trucking"
label var  bauxite_dum  "Bauxite (0-1)"
label var  iron_dum "Iron Ore (0-1)"
label var  chromite_dum "Chromite (0-1)"
label var  manganese_dum "Manganese (0-1)"
label var  expired_base_vill "Lease expired before 2009"
label var counter "Number of years after imputation (2007-2013)"
label var treat_post "Iron Mine x Post"
label var treat_post1 "Iron Mine x (2009-2011)"
label var treat_post2 "Iron Mine x (2012-2013)"
label var treat_post1_exp_vill "Iron Mine x (2009-2011) x Expired"
label var treat_post2_exp_vill "Iron Mine x (2012-2013) x Expired"
label var freq "Main sample indicator"
label var state_id "State ID"
label var panelid "Panel ID"
label var treat_post "Iron Mine x Post"
label var imputed "Imputed observation"
label var excess_dum_vill_full  "Excess mining area (0-1), assigning '08 to '07 if missing"
label var excess_dum50_vill_full  "Excess mining area, at least 50\% (0-1), assigning '08 to '07 if missing"
label var truck_active_full  "Mine with active trucking, assigning '08 to '07 if missing"

save "FINAL_Replication_Mines", replace


*****************************************
* 2. Create FINAL_Replication_Leases.dta
*****************************************

u "INPUT_Leases.dta", clear
egen mine_id=group(area yearexp mineral)
label var mine_id  "Mine ID"
label var area "Legal area"
label var yearexp "Expiration Year"
label var mineral "Mineral"
preserve
u "INPUT_Mines", clear
collapse (first) mineral area yearexp, by(panelid)
tempfile aux
save `aux'
restore
merge m:1 mineral area yearexp using `aux', keepusing(panelid)
gen sample=panelid!=.
drop panelid
drop _merge
label var sample "Included in Mine panel"
save "FINAL_Replication_Leases", replace
